clear all

*Define global path for replication package
global path "~/Dropbox/IT_Revolution/Replication_package/JPE submission"

global path_rawdata "$path/Raw_data"
global path_cleandata "$path/Clean_data"
global path_output "$path/Output"

*******************************************************************************************
**** (1) ICT: Latter period ***************************************************************
*******************************************************************************************
** alternative from direct requirements
import excel "$path_rawdata/ICT_share/NAICSUseSummary_ICT.xlsx", sheet("Sheet2") firstrow clear
keep Desc NAICS ICT_Share
rename ICT_Share value

** generate averages at the 2 and 3 digit level so that we can map to the Census codes later
gen NAICS2 = substr(NAICS,1,2)
gen NAICS3 = substr(NAICS,1,3)

bys NAICS3: egen value_3 = mean(value)
bys NAICS2: egen value_2 = mean(value)

cap mkdir "$path_cleandata"

preserve
keep NAICS3 value_3
duplicates drop
save "$path_cleandata/ICT_shares_3digit", replace
restore

preserve
keep NAICS2 value_2
duplicates drop
save "$path_cleandata/ICT_shares_2digit", replace
restore

drop NAICS2 NAICS3 value_*
save "$path_cleandata/ICT_shares", replace

** Now we need a concordance to the Census industry codes from the excel file in the census website 
import excel "$path_rawdata/Electricity share/industry-crosswalk-90-00-02-07-12.xls", sheet("1990IND_1997NAICS") cellrange(A1:B292) firstrow clear
replace Census = subinstr(Census,"+","", .)
replace Census = subinstr(Census,"-", "",.)
replace Census = subinstr(Census,"*","", .)
replace Census = subinstr(Census,"p","", .)
replace Census = subinstr(Census,"**","", .)
replace Census = subinstr(Census,"/","", .)

replace Census = strtrim(Census)
replace Census = "" if Census=="New"| Census=="New2"| Census=="New3"
drop if Census==""
destring Census, replace

split NAICS, p(",")
drop NAICS

foreach var of varlist NAICS*{
replace `var' = strtrim(`var')

replace `var' = substr(`var',1,4)
}

***Some Census are duplicated, fix this
bysort Census: gen dups=_N
tostring Census, replace
local count 1
qui sum dups
while r(max)>1{
replace Census=Census+"_`count'" if Census==Census[_n-1]
drop dups
bysort Census: gen dups=_N
qui sum dups
local count=`count'+1
}

drop dups
reshape long NAICS, i(Census) j(count)
replace Census = substr(Census,1,3)
destring Census, replace
drop count
duplicates drop
drop if NAICS==""

replace NAICS = strtrim(NAICS)
gen NAICS2 = substr(NAICS,1,2)
gen NAICS3= substr(NAICS,1,3)

save "$path_cleandata/NAICS_ind1990_cw", replace


*** Now merge in shares
use "$path_cleandata/NAICS_ind1990_cw", clear

merge m:1 NAICS using "$path_cleandata/ICT_shares"

drop if _m==2
drop _m

merge m:1 NAICS3 using "$path_cleandata/ICT_shares_3digit"
drop if _m==2
replace value = value_3 if value==.
drop value_3
drop _m

merge m:1 NAICS2 using "$path_cleandata/ICT_shares_2digit"
drop if _m==2
replace value = value_2 if value==.
drop value_2
drop _m

drop if Census==.
rename Census ind1990
replace value = 0 if value==.
keep ind1990 value

collapse (mean) value, by (ind1990)

preserve
save "$path_cleandata/disaggregated_ict_Census", replace
restore

** now: noise elimination by grouping things
xtile value_q = value, nq(4)
bys value_q: egen temp = mean(value)
replace value = temp
drop value_q temp

sum value

save "$path_cleandata/ICT_shares_final", replace

****************************************************************************************
***** (2) Electricity exposure: early period *******************************************
****************************************************************************************

** First, construct concordance
import excel "$path_rawdata/Electricity share/Crosswalk.xlsx", sheet("IND1950_SIC1957") cellrange(A5:C188) firstrow clear
drop if IND1950==.
drop if BEA1947_IO==""
split BEA1947_IO, p(",")
drop BEA1947_IO
reshape long BEA1947_IO, i(IND1950 B) j(map, str)
drop map
drop if BEA1947_IO==""
destring BEA1947_IO,replace
rename B sector_desc

save "$path_cleandata/ind1950_SIC57", replace


** Now import electricity shares
import excel "$path_rawdata/Electricity share/1947_IO_Table/Electricity_shares.xlsx", sheet("1947_Transactions_85_detail_Dat") firstrow clear

keep ProducingIndustry ConsumingIndustry TotalTransactions1947Dollars TotalTransactions1958Dollars

collapse (sum) TotalTransactions1947Dollars TotalTransactions1958Dollars, by(ProducingIndustry)

rename ProducingIndustry Industry

save "$path_cleandata/electricity_shares", replace

import excel "$path_rawdata/Electricity share/1947_IO_Table/Electricity_shares.xlsx", sheet("Sheet 1") cellrange(A2:F90) firstrow clear

rename PurchasesofElectricity utilities
rename PurchasesofElectricalIn electric_equipm
rename PurchasesofElectricligh electric_lighting
rename PurchasesofElectriccomp electric_components
rename PurchasesofMiscelectri misc_electric_mach

drop if Industry==""


merge 1:1 Industry using "$path_cleandata/electricity_shares"
keep if _m==3
drop _m

gen tot_elect=0
foreach var of varlist utilities electric* misc_{
replace tot_elect=tot_elect+`var'
}

gen elect_share = tot_elect/TotalTransactions1947Dollars

drop tot_elect
gen tot_elect=0
foreach var of varlist  electric* misc_{
replace tot_elect=tot_elect+`var'
}

gen elect_share_noutil = tot_elect/TotalTransactions1947Dollars
drop tot_elect
save "$path_cleandata/electricity_shares" ,replace

** merge with concordance

destring Industry, replace
rename Industry BEA1947_IO

merge 1:m BEA1947_IO using "$path_cleandata/ind1950_SIC57"


foreach var of varlist elect_share* TotalTransactions1947Dollars{
replace `var'=0 if `var'==.
}

drop _m

** collapse to the average share in all industries the IND1950 is matched to in the BEA
collapse (mean) elect_share* [aw=TotalTransactions1947Dollars], by(IND1950 sector_desc)

rename IND1950 ind1950

gen manuf = (ind1950 > 300 & ind1950 < 500)

replace manuf=0 if ind1950<=300
replace manuf=0 if ind1950>=500

summ elect_share_noutil if manuf==1,det

xtile temp=elect_share_noutil if manuf==1, nq(3)
replace manuf=temp if manuf==1
drop temp

foreach var of varlist elect_share*{
bys manuf: egen m_`var' = mean(`var')
replace `var'=m_`var'
drop m_`var'
}
drop elect_share

save "$path_cleandata/electricity_shares", replace

** Clean up
erase "$path_cleandata/disaggregated_ict_Census.dta"
erase "$path_cleandata/ICT_shares_2digit.dta"
erase "$path_cleandata/ICT_shares_3digit.dta"
erase "$path_cleandata/ICT_shares.dta"
erase "$path_cleandata/NAICS_ind1990_cw.dta"

erase "$path_cleandata/ind1950_SIC57.dta"
